We want to look at a group of station data and test a strategy for assessing data quality. This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Get some data to play with.
library(esd)
## Loading required package: ncdf4
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'esd'
## The following object is masked from 'package:base':
##
## subset.matrix
library(utils)
if (!file.exists('precip.norway.rda')) download.file('https://ndownloader.figshare.com/files/2155751','precip.norway.rda')
load('precip.norway.rda')
## The precipitation data is 'Pr'
Now inspect the data - check for missing values
diagnose(Pr)
Estimate annual aggregates:
mu <- annual(Pr,'wetmean')
fw <- annual(Pr,'wetfreq')
map(mu,FUN='trend',new=FALSE)
Plot the original daily data
plot(Pr,new=FALSE)
The wet-day mean precipitation \(\mu\)
plot(mu,new=FALSE)
The wet-day frequency \(f_w\)
plot(fw,new=FALSE)
Apply a principal component analysis (PCA) to the annual aggregates:
pca.mu <- PCA(mu)
pca.fw <- PCA(fw)
Take a copy of the original data and mess it up by introducing deliberate errors and artifacts to degrade the information embedded. The idea is to explore the ways such errors affect the end result.
X <- Pr
## Degrade the original data by synthetically introducing of errors and problems
d <- dim(X)
## Indices with values set to zero
s20.1 <- sample(1:d[1],1000); s20.2 <- sample(1:d[2],10)
## Indices with values set to random
s2r.1 <- sample(1:d[1],1000); s2r.2 <- sample(1:d[2],10)
## Set a random sub-selection of data points to zero or random values
X[s20.1,s20.2] <- 0
X[s2r.1,s2r.2] <- rexp(10000,rate=0.001)
## Set some locations to bad values
S2R <- sample(1:d[2],3)
X[,S2R] <- rexp(d[1],rate=c(1,0.1,0.5))
Estimate synthetic annual aggregates for \(\mu\) and \(f_w\).
mu.s <- annual(X,'wetmean')
fw.s <- annual(X,'wetfreq')
map(mu,FUN='trend',new=FALSE)
Plot the degraded data:
plot(X,new=FALSE)
The aggregated degraded data: \(\mu\)
plot(mu.s,new=FALSE)
and \(f_w\)
plot(fw.s,new=FALSE)
Apply PCA to the degraded data:
pca.smu <- PCA(mu.s)
pca.sfw <- PCA(fw.s)
Plot the leading PCA mode of the original \(\mu\)
plot(pca.mu,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
Plot the leading PCA mode of the degraded \(\mu\)
plot(pca.smu,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
Plot the leading PCA mode of the original \(\mu\)
plot(pca.fw,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
Plot the leading PCA mode of the degraded \(\mu\)
plot(pca.sfw,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
Retrieve mean sea-level pressure (SLP) and use a regression analysis to examine the data, knowing that the SLP is related to \(f_w\).
#slp <- annual(retrieve('slp.mon.mean.nc',lon=c(-60,30),lat=c(50,70)))
#eof.slp<- EOF(slp)
#save(file='eof.slp.rda',eof.slp)
load('eof.slp.rda')
ds <- DS(subset(pca.fw,pattern=1:3),eof.slp)
##
|
| | 0%
|
|====================== | 33%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
##
|
|=========================================== | 67%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
##
|
|=================================================================| 100%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
ds.s <- DS(subset(pca.sfw,pattern=1:3),eof.slp)
##
|
| | 0%
|
|====================== | 33%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
##
|
|=========================================== | 67%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
##
|
|=================================================================| 100%
## Warning in DS.station(ys, X, biascorrect = biascorrect, m = m, eofs =
## eofs, : DS.station: different indices: Date numeric
Compare the results:
plot(ds,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
## NULL
plot(ds.s,new=FALSE)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "plot" is not a
## graphical parameter
## NULL